# -*- coding:utf-8 -*-
"""
__author__='Wang Bo'
email:joker_wb@163.com
"""

# import requests
# from urllib import request
# import re
# def getNovelSortList():
#     a='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0'
#     header={'user_Agent':a}
#     url='http://www.quanshuwang.com/list/1_1.html'
#     url1='http://www.baidu.com'
#     # response=requests.request("GET",url1)
#     req=request.Request(url,headers=header)
#     response=request.urlopen(req)
#     result=response.read().decode('gbk')
#
#
#     reg=r'<a target="_blank" title=".*?" href="(.*?)" class="clearfix stitle">(.*?)</a>'
#     novel_url_list=re.findall(reg,result)
#     print(novel_url_list)
#
# getNovelSortList()
#




# import requests
# import re
# def getNovelSortList():
#     response=requests.get('http://www.quanshuwang.com/list/1_1.html')
#     response.encoding='gbk'
#     result=response.text
#     reg=r'<a target="_blank" title=".*?" href="(.*?)" class="clearfix stitle">(.*?)</a>'
#     novel_url_list=re.findall(reg,result)
#     print(novel_url_list)
#
# getNovelSortList()


from urllib import request
import requests
import re
import time

def getNovelSortList():
    a='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0'
    #网页中按下F12，选择网络查看user_agent
    header={'user_Agent':a}
    url='http://www.quanshuwang.com/list/1_1.html'
    #response=requests.request("GET",url1)
    req=request.Request(url,headers=header)
    #可以理解加工后的网址
    response=request.urlopen(req)
    result=response.read().decode('gbk')
    #源代码
    reg=r'<a target="_blank" title=".*?" href="(.*?)" class="clearfix stitle">(.*?)</a>'
    novel_url_list=re.findall(reg,result)
    response.close()
    return novel_url_list
#如果输出的东西里面有一些不需要的数据，则说明正则函数不完整,输出为数组
#for i in getNovelSortList():
#   print(i[0])输出结果为地址

def get_novel_content(url):
    # time.sleep(2)
    response=requests.get(url)
    response.encoding='gbk'
    result=response.text
    reg=r'<a href="(.*?)" class="reader" title=".*?">开始阅读</a>'
    novel_url=re.findall(reg,result)[0]
    return novel_url

def get_chapter_url_list(url):
    time.sleep(0.5)
    a='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0'
    #网页中按下F12，选择网络查看user_agent
    header={'user_Agent':a}
    #response=requests.request("GET",url1)
    req=request.Request(url,headers=header)
    #可以理解加工后的网址
    response=request.urlopen(req)
    result=response.read().decode('gbk')
    reg=r'<li><a href="(.*?)" title=".*?">(.*?)</a></li>'
    chapter_url_list=re.findall(reg,result)
    response.close()
    return chapter_url_list

for novel_url,novel_name in getNovelSortList():
    # print(novel_url,novel_name)

    novel_content_url=get_novel_content(novel_url)

    print(novel_content_url)
    for i in get_chapter_url_list(novel_content_url):
        print(i)
        time.sleep(2)
    break